set.seed(2021)
# library(devtools)
# install_github("welch-lab/liger", ref = "U_algorithm")
library(liger)
First, read in your datasets. For this tutorial, we will use the osmFISH dataset (33 genes by 5,219 cells), and a downsampled single-cell RNA-seq dataset (29,463 genes by 15,000 cells). The datasets can be downloaded at https://www.dropbox.com/sh/y9kjoum8u469nj1/AADik2b2-Qo3os2QSWXdIAbna?dl=0
osmFISH = readRDS("../../UINMF/OSMFISH.vin.RDS")
rna = readRDS("../../UINMF/DROPVIZ.vin.RDS")
Next, create your Liger object, submitting the datasets in list format. The unshared features should not be subsetted out, or submitted separately. Rather, they should be included in the matrix submitted for that dataset. For example,the scRNA-seq data is submitted in its entirety, the unshared features are not submitted separately. This helps ensure proper normalization.
osm.liger <- createLiger(list(osmFISH = osmFISH, rna = rna))
## [1] "Removing 34 cells not expressed any genes in osmFISH."
## [1] "Removing 4910 genes not expressed in rna."
Normalize the datasets.The normalization is applied to the datasets in their entirety.
osm.liger <- normalize(osm.liger)
To include unshared features in your analysis, set the unshared parameter to TRUE when selecting genes.When selecting the unshared features, it is necessary to include a list of what datasets unshared features should be included for. For instance, in this case, we wish to include the unshared features from the RNA dataset, the second dataset in our analysis. We provide an individual tuning threshold for unshared features. If a single value is submitted, that threshold is applied to all datasets’ unshared features. If multiple datasets include unshared features, the user can specify an individual thresholds for each dataset by submitting a list of thresholds the same length as the number of datasets with unshared datasets. The variable unshared features willl be stored in liger@var.unshared.features.
osm.liger <- selectGenes(osm.liger, unshared = TRUE, unshared.datasets = list(2), unshared.thresh= 0.4)
## [1] "Selected 1327 unshared features from rna Dataset"
The scaleNotCenter functions will scale both the shared and unshared features. The scaled unshared features will be stored in liger@scale.unshared.data
osm.liger <- scaleNotCenter(osm.liger)
## [1] "Removing 344 genes not expressed in rna."
To factorize the datasets including unshared datasets, set the use.unshared parameter to TRUE.
osm.liger <- optimizeALS(osm.liger, k=30, use.unshared = TRUE)
## [1] "Performing Factorization using UINMF"
## [1] "Processing"
##
|
| | 0%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Current seed 1 current objective 18918597
## Best results with seed 1.
After factorization, the resulting Liger object can used in all downstream LIGER functions without adjustment. The default reference dataset for quantile normalization is the larger dataset, but the user should select the higher quality dataset as the reference dataset, even if it is the smaller dataset.
osm.liger <- quantile_norm(osm.liger, ref_dataset= "rna")
osm.liger <- louvainCluster(osm.liger)
osm.liger <- runUMAP(osm.liger)
Next, we can visualize our returned factorized object by dataset to check the alignment between datasets, as well as by cluster determined in the factorization.
umap_plots <-plotByDatasetAndCluster(osm.liger, axis.labels = c("UMAP1","UMAP2"), return.plots = TRUE)
umap_plots[[1]]
umap_plots[[2]]
We can also examine features such as gene expression.
Pdgfra <- plotGene(osm.liger, "Pdgfra", axis.labels = c('UMAP 1', 'UMAP 2'), return.plots = TRUE)
Bmp4 <- plotGene(osm.liger, "Bmp4", axis.labels = c('UMAP 1', 'UMAP 2'), return.plots = TRUE)
plot_grid(Pdgfra[[2]],Pdgfra[[1]],Bmp4[[2]],Bmp4[[1]], ncol=2)
and check that UINMF wrapper is consistent
source("../scripts/initialise.R")
data are features (rows) cells (columns), same as what StabMap wants.
dim(osmFISH)
## [1] 33 5219
dim(rna)
## [1] 29463 15000
osmFISH_filt = osmFISH[,colSums(osmFISH) > 0]
counts_list = list(rna = rna,
osmFISH = osmFISH)
rna_logcounts = assay(logNormCounts(SingleCellExperiment(assays = list(counts = rna))), "logcounts")
osmFISH_logcounts = assay(logNormCounts(
SingleCellExperiment(assays = list(counts = osmFISH_filt)),
size_factors = colSums(osmFISH_filt)),
"logcounts")
assay_list = list(rna = rna_logcounts,
osmFISH = osmFISH_logcounts)
nPCs = 50
StabMap_embedding = stabMapGeneralised(assay_list,
reference_list = "rna",
ncomponentsReference = nPCs,
ncomponentsSubset = nPCs,
projectAll = TRUE,
plot = TRUE)
dim(StabMap_embedding)
## [1] 20185 50
StabMap_umap = calculateUMAP_rnames(StabMap_embedding)
plot(StabMap_umap, col = osm.liger@clusters[rownames(StabMap_umap)])
UINMF_embedding = UINMF_wrap(counts_list = counts_list)
## [1] "Removing 34 cells not expressed any genes in osmFISH."
## [1] "Removing 4910 genes not expressed in rna."
## [1] "Selected 1327 unshared features from rna Dataset"
## [1] "Removing 344 genes not expressed in rna."
## [1] "Performing Factorization using UINMF"
## [1] "Processing"
##
|
| | 0%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Current seed 1 current objective 18890732
## Best results with seed 1.
UINMF_umap = calculateUMAP_rnames(UINMF_embedding)
plot(UINMF_umap, col = osm.liger@clusters[rownames(UINMF_umap)])
Finish
sessionInfo
## function (package = NULL)
## {
## z <- list()
## z$R.version <- R.Version()
## z$platform <- z$R.version$platform
## if (nzchar(.Platform$r_arch))
## z$platform <- paste(z$platform, .Platform$r_arch, sep = "/")
## z$platform <- paste0(z$platform, " (", 8 * .Machine$sizeof.pointer,
## "-bit)")
## z$locale <- Sys.getlocale()
## z$running <- osVersion
## z$RNGkind <- RNGkind()
## if (is.null(package)) {
## package <- grep("^package:", search(), value = TRUE)
## keep <- vapply(package, function(x) x == "package:base" ||
## !is.null(attr(as.environment(x), "path")), NA)
## package <- .rmpkg(package[keep])
## }
## pkgDesc <- lapply(package, packageDescription, encoding = NA)
## if (length(package) == 0)
## stop("no valid packages were specified")
## basePkgs <- sapply(pkgDesc, function(x) !is.null(x$Priority) &&
## x$Priority == "base")
## z$basePkgs <- package[basePkgs]
## if (any(!basePkgs)) {
## z$otherPkgs <- pkgDesc[!basePkgs]
## names(z$otherPkgs) <- package[!basePkgs]
## }
## loadedOnly <- loadedNamespaces()
## loadedOnly <- loadedOnly[!(loadedOnly %in% package)]
## if (length(loadedOnly)) {
## names(loadedOnly) <- loadedOnly
## pkgDesc <- c(pkgDesc, lapply(loadedOnly, packageDescription))
## z$loadedOnly <- pkgDesc[loadedOnly]
## }
## z$matprod <- as.character(options("matprod"))
## es <- extSoftVersion()
## z$BLAS <- as.character(es["BLAS"])
## z$LAPACK <- La_library()
## l10n <- l10n_info()
## if (!is.null(l10n["system.codepage"]))
## z$system.codepage <- as.character(l10n["system.codepage"])
## if (!is.null(l10n["codepage"]))
## z$codepage <- as.character(l10n["codepage"])
## class(z) <- "sessionInfo"
## z
## }
## <bytecode: 0x7fd14c6e6388>
## <environment: namespace:utils>